import random

def split_dataset(input_file, train_file, validation_file, validation_ratio=0.1):
    with open(input_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    random.shuffle(lines)
    total_lines = len(lines)
    validation_size = int(total_lines * validation_ratio)

    validation_data = lines[:validation_size]
    train_data = lines[validation_size:]

    with open(validation_file, 'w', encoding='utf-8') as f:
        f.writelines(validation_data)

    with open(train_file, 'w', encoding='utf-8') as f:
        f.writelines(train_data)

if __name__ == "__main__":
    input_file = "dataset_wavs/NickYuan/all_train_lists.txt"
    train_file = "dataset_wavs/NickYuan/train_lists.txt"
    validation_file = "dataset_wavs/NickYuan/validation_lists.txt"
    validation_ratio = 0.1

    split_dataset(input_file, train_file, validation_file, validation_ratio)
    print("Dataset split completed.")
